*----------------------------------------------------------------------------------------------------------	* 
* Silencing the Rails: A Study of the Noise-Safety Trade-off in Railway Quiet Zones                         *
* RESEARCHERS:		Emtiaz Hritan																		    *
* PROGRAMMED BY:	Emtiaz Hritan 					 											            *
* CREATED:			Oct. 25, 2023																		   	*
* LAST MODIFIED:	Oct. 7, 2025														       				*
*----------------------------------------------------------------------------------------------------------	*

clear all
set more off
* Set local paths
* Set this local datapath equal to the folder location for data
	local datapath "C:\Users\name\Replication Files Silencing the Rails\Data"
* Set this local outputpath equal to the folder location for outcome like tables
	local outputpath "C:\Users\name\Replication Files Silencing the Rails\Outcome"

* Install necessary packages
ssc install reghdfe
ssc install ftools
ssc install grstyle, replace
ssc install palettes, replace
ssc install colrspace, replace
ssc install ppmlhdfe, replace 
ssc install jwdid, replace 
ssc install csdid, replace 
ssc install drdid, replace 
ssc install did_imputation, replace
ssc install frause, replace
frause mpdta.dta, clear
ssc install hdfe, replace 
ssc install event_plot, replace
ssc install addplot, replace


* Clear everything
clear all
set maxvar 120000

****************************************************************************
*Table 2—:Baseline Results: Impact of Quietzone on Railway Crossing Accidents
****************************************************************************
cd "`datapath'"
******* TWFE(PPML): Column (1) ***********
use final 
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

eststo:ppmlhdfe accidents_per_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

******* TWFE(PPML): Column (2) ***********

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe accidents_per_year dty, a(crossingid year statecode#year) vce(cluster crossingid)



******* TWFE(PPML): Column (3) ***********
clear all
use final 
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995


*Lets merge this with county data
merge m:1 crossingid using state_county_name.dta, force 
drop _merge
save county_accidents.dta, replace 
clear

use county_accidents
merge m:1 county_fips year using county_1990_2019.dta, force 
keep if _merge==3
drop _merge
save county_accidents_controls.dta, replace 


* Generate nonwhite population
gen non_white= tot_pop - white_pop 
* All possible controls: tot_pop tot_female non_white


local controls "tot_pop tot_female non_white" 

eststo:ppmlhdfe accidents_per_year dty `controls', a(crossingid year state#year) vce(cluster crossingid)

eststo:ppmlhdfe accidents_per_year dty `controls', a(crossingid year county#year) vce(cluster crossingid)






******* ETWM: Column (4) ***********

* Clear everything
clear all
set maxvar 120000
* Use the accident, all railway crossing and all quietzone dataset "final"
use final 

// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson)
estat simple
estimates store jwdid_notyet
estat event 
jwdid_plot
jwdid_plot, style(rbar)
estat calendar
jwdid_plot


******* ETWM: Column (5) ***********


*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
estat simple
estimates store jwdid_never
estat event 
jwdid_plot
jwdid_plot, style(rbar)
jwdid_plot, pstyle1(p3)
estat calendar
jwdid_plot

eststo clear

* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------



****************************************************************************
*Figure 5 : Event study
****************************************************************************


                ******* Panel (a) Two-way fixed effects model ***********
clear all
use final, clear  
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

g time_to_treat = year - first_treat
tab time_to_treat
 // creating dummies for the lags 0..9, based on time_to_treat = number of periods since treatment (or missing if there is a never-treated group)
 
 replace time_to_treat=. if first_treat==0
  
   forvalues l = 0/13 {
                gen L`l'event = time_to_treat==`l'
        }
        gen L14event = time_to_treat>=14 // binning K=14 and above
        
        // creating dummies for the leads 1..14
   forvalues l = 0/13 { 
                gen F`l'event = time_to_treat==-`l'
        }
        gen F14event = time_to_treat<=-14 // binning K=-14 and below

        // running the event study regression. Drop leads 1 and 2 to avoid underidentification
        //if there is no never-treated group (could instead drop any others); see Borusyak et al. 2021
        ppmlhdfe accidents_per_year o.F1event o.F2event F3event-F14event L*event, a(crossingid year) vce(cluster crossingid)
               
        // plotting the coeffients
        event_plot, default_look stub_lag(L#event) stub_lead(F#event) together plottype(scatter) ///
                graph_opt(xtitle("Year since the Quietzone") ytitle("OLS coefficients") xlabel(-14(7)14 14))
        
    estimates store ols1 

graph export "ols_event.pdf", replace

******* Panel (b) Model proposed by Wooldridge(2021) ***********

* Clear everything
clear all
set maxvar 120000

* Use the accident, all railway crossing and all quietzone dataset "final"
use final 

// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995


*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
estat event, window(-10 15) estore(jw1) 
estimates save ejw, replace

jwdid_plot, style(rbar)


graph export "jwdid_event_10_15.pdf", replace
eststo clear

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------




****************************************************************************
*Table A7—: Results:the impact of quiet zones on accident Damage
****************************************************************************

******* Column (1) & (2) : TWFE Models***********
clear all
use final 

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)

// Calculate the count of accidents per railway crossing per year
egen damagecost_year = total(vehicledamagecost) if !missing(year), by(crossing_id year) 

duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep damagecost_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace damagecost_year=0 if damagecost_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

eststo:reghdfe damagecost_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

grstyle init
grstyle set plain, horizontal nogrid
coefplot jwdid_notyet jwdid_never bjs , keep(dty) xscale( range(-1 )) xline(0) title("Reading Z-scores")
esttab using output2.csv,  b(3) se(3) r2 
eststo clear

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:reghdfe damagecost_year dty, a(crossingid year statecode#year) vce(cluster crossingid)





******* Column (3) & (4) : CSDID ***********

clear all
use final 

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

// Calculate the count of accidents per railway crossing per month
egen damagecost_year = total(vehicledamagecost) if !missing(year), by(crossingid year)
gen whistle_year=year(WhistleDate)

duplicates report crossingid year
duplicates drop crossingid year, force 

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep damagecost_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace damagecost_year=0 if damagecost_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

* Using csdid package 
csdid damagecost_year, ivar(crossingid) time(year) gvar(first_treat)
estat all

*Not yet treated 
csdid damagecost_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all



******* Column (5) :  BJS ***********
* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
 gen Ei= first_treat
 replace Ei=. if Ei==0

did_imputation damagecost_year crossingid year Ei, fe(crossingid year)
estimates store bjs


******* Column (6) & (7):  ETWM ***********


****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid damagecost_year, ivar(crossingid) tvar(year) gvar(first_treat) 
estat simple
estimates store jwdid_notyet


*Using Never treated as controls
jwdid damagecost_year, ivar(crossingid) tvar(year) gvar(first_treat) never 
estat simple
estimates store jwdid_never
eststo clear
* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------



****************************************************************************
*Table A8—:Results:the impact of quietzones on accident killed
****************************************************************************

******* Column (1) & (2) : TWFE Models***********

clear all
use final 
// Calculate the count of accidents per railway crossing per year
egen killed_year = total(crossinguserskilled) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep killed_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace killed_year=0 if killed_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

eststo:ppmlhdfe killed_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe


*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe killed_year dty, a(crossingid year statecode#year) vce(cluster crossingid)



******* Column (3) & (4) : CSDID ***********
clear all
use final 

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

// Calculate the count of accidents per railway crossing per month
egen killed_year = total(crossinguserskilled) if !missing(year), by(crossingid year)
gen whistle_year=year(WhistleDate)

duplicates report crossingid year
duplicates drop crossingid year, force 

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep killed_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace killed_year=0 if killed_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995


* Using csdid package 
csdid killed_year, ivar(crossingid) time(year) gvar(first_treat)
estat all

*Not yet treated 
csdid killed_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all


******* Column (5) :  BJS ***********

* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation killed_year crossingid year Ei, fe(crossingid year)
estimates store bjs


******* Column (6) & (7):  ETWM ***********

****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid killed_year, ivar(crossingid) tvar(year) gvar(first_treat) 
estat simple
estimates store jwdid_notyet


*Using Never treated as controls
jwdid killed_year, ivar(crossingid) tvar(year) gvar(first_treat) never 
estat simple
estimates store jwdid_never
eststo clear
* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------




****************************************************************************
*Table A9—:Results:the impact of quietzones on accident injured
****************************************************************************

******* Column (1) & (2) : TWFE Models***********
clear all
use final 
* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)

// Calculate the count of accidents per railway crossing per year
egen injured_year = mean(crossingusersinjured) if !missing(year), by(crossingid year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep injured_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace injured_year=0 if injured_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

eststo:ppmlhdfe injured_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

grstyle init
grstyle set plain, horizontal nogrid
coefplot jwdid_notyet jwdid_never bjs , keep(dty) xscale( range(-1 )) xline(0) title("Reading Z-scores")
esttab using output2.csv,  b(3) se(3) r2 
eststo clear

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe injured_year dty, a(crossingid year statecode#year) vce(cluster crossingid)


******* Column (3) & (4) : CSDID ***********

clear all
use final 

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

// Calculate the count of accidents per railway crossing per month
egen injured_year = total(crossingusersinjured) if !missing(year), by(crossingid year)
gen whistle_year=year(WhistleDate)

duplicates report crossingid year
duplicates drop crossingid year, force 

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep injured_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace injured_year=0 if injured_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

* Using csdid package 

csdid injured_year, ivar(crossingid) time(year) gvar(first_treat)
estat all


*Not yet treated 
csdid injured_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all



******* Column (5) :  BJS ***********
* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation injured_year crossingid year Ei, fe(crossingid year)
estimates store bjs



******* Column (6) & (7):  ETWM ***********

****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid injured_year, ivar(crossingid) tvar(year) gvar(first_treat) 
estat simple
estimates store jwdid_notyet

*Using Never treated as controls
jwdid injured_year, ivar(crossingid) tvar(year) gvar(first_treat) never 
estat simple
estimates store jwdid_never
eststo clear 
* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------




****************************************************************************
*Table A5—: Results 2005 cohort
****************************************************************************

******* Column (1) & (2) : TWFE Models***********
clear all
use final 
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
keep if first_treat==0 | first_treat==2005 
drop if year< 1995

eststo:ppmlhdfe accidents_per_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe accidents_per_year dty, a(crossingid year statecode#year) vce(cluster crossingid)



******* Column (3) & (4) : CSDID ***********
clear all
use final 
// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
keep if first_treat==0 | first_treat==2005 
drop if year< 1995


* Using csdid package 

csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat)
estat all

*Not yet treated 
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all


******* Column (5) :  BJS ***********

* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation accidents_per_year crossingid year Ei, fe(crossingid year)
estimates store bjs


******* Column (6) & (7):  ETWM ***********

****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson)
estat simple
estimates store jwdid_notyet


*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
estat simple
estimates store jwdid_never
eststo clear
* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------



****************************************************************************
* Table A6—: Results Post 2005 Cohort
****************************************************************************

******* Column (1) & (2) : TWFE Models***********
clear all
use final 
// Calculate the count of accidents per railway crossing per year
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)

* Let's get the date of whistle ban in stata format
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:
egen crossingid = group(crossing_id)
duplicates report crossingid year
duplicates drop crossingid year, force 

* Teatment is defined as the railway crossing that has the established quietzone
gen treatment =0 
replace treatment =1 if quietzone==1

* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat treatment 
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

*Do the same for treatment variable
// Create variables indicating if there is any observation with treatment=1 or treatment=0 within the same crossing_id
egen any_treatment_1 = max(treatment == 1), by(crossingid)
egen any_treatment_0 = max(treatment == 0), by(crossingid)

// Replace missing treatment values with 1 if any_treatment_1 is 1
replace treatment = 1 if missing(treatment) & any_treatment_1 == 1

// Replace missing treatment values with 0 if any_treatment_0 is 1
replace treatment = 0 if missing(treatment) & any_treatment_0 == 1

// Drop the temporary variables
drop any_treatment_1 any_treatment_0

* Generate time dummy variable- post for observations occuring after first time becoming "Quiet Zone"
gen post_time=0 
replace post_time= 1 if year > first_treat
replace post_time=0 if first_treat==0

* Create treated
gen dty = 0
replace dty =1 if treatment==1 & post_time==1

xtset crossingid year

* Drop data before November, 1994
keep if first_treat==0 | first_treat>2005 
drop if year< 1995

eststo:ppmlhdfe accidents_per_year dty, a(crossingid year) vce(cluster crossingid)
estimates store dtwfe

grstyle init
grstyle set plain, horizontal nogrid
coefplot jwdid_notyet jwdid_never bjs , keep(dty) xscale( range(-1 )) xline(0) title("Reading Z-scores")
esttab using output2.csv,  b(3) se(3) r2 
eststo clear

*Lets merge this with state county data to have state fixed effects
merge m:1 crossingid using state_county.dta, force 
drop _merge
eststo:ppmlhdfe accidents_per_year dty, a(crossingid year statecode#year) vce(cluster crossingid)


******* Column (3) & (4) : CSDID ***********
clear all
use final 
// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
keep if first_treat==0 | first_treat>2005 
drop if year< 1995

* Using csdid package 
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat)
estat all

*Not yet treated 
csdid accidents_per_year, ivar(crossingid) time(year) gvar(first_treat) notyet
estat all


******* Column (5) :  BJS ***********
* Using did_imputation package based on Borusyak, Kirill, Xavier Jaravel, and Jann Spiess (2023). "Revisiting Event Study Designs: Robust and Efficient Estimation," Working paper.
gen Ei= first_treat
replace Ei=. if Ei==0
did_imputation accidents_per_year crossingid year Ei, fe(crossingid year)
estimates store bjs



******* Column (6) & (7):  ETWM ***********

****First I use jwdid package to implmemnt extended two-way Mundlak (ETWM) regression of "Wooldridge, Jeffrey. 2021.  Two-Way Fixed Effects, the Two-Way Mundlak Regression, and Differences-in-Differences estimators. Working paper."
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson)
estat simple
estimates store jwdid_notyet


*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
estat simple
estimates store jwdid_never
eststo clear
* Note: For the new staggered models, I reported the results in the .tex files manually. Please refer to the "Latex Source Files" subfolder of "Outcome" folder. 
*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------


****************************************************************************
* Figure A2 & A3. :Event study estimates showing the effect of quiet zones on fatal accidents
****************************************************************************

* Clear everything
clear all
set maxvar 120000

* Use the accident, all railway crossing and all quietzone dataset "final"
use final 

// Calculate the count of accidents per railway crossing per month
egen accidents_per_year = total(accident) if !missing(year), by(crossing_id year)
gen whistle_year=year(WhistleDate)

* Crossing IDs are string.Let's change it:

egen crossingid = group(crossing_id)

duplicates report crossingid year
duplicates drop crossingid year, force 
 
* Let's define "gvar" first. gvar=0 if never treated and gvar = year first treated if ever treated.
gen first_treat= whistle_year
replace first_treat=0 if quietzone ==0 

* Keep only the important variable
keep accidents_per_year crossingid year first_treat
* Let's balance this data using tsfill
xtset crossingid year
tsfill, full
replace accidents_per_year=0 if accidents_per_year==.
* Let's replace first_treat
// Sort your dataset by id and year
sort crossingid year

// Create a new variable that checks if income is zero for any year for each id
egen nonmissing_other_year = min(first_treat), by(crossingid)

// Replace missing values of 'other_year' with non-missing values for the same id
replace first_treat = nonmissing_other_year if missing(first_treat)

// Drop the temporary variable nonmissing_other_year
drop nonmissing_other_year

xtset crossingid year

* Drop data before November, 1994
drop if year< 1995

* JWDID
*Estimation of ATTGTs without controls using not-yet treated groups
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson)
*Figure A2 Panel (a)
estat event, window(-10 10) estore(jw1) 
jwdid_plot, style(rbar)
graph export "jwdid_event_notyet.eps", replace
*Figure A3 Panel (a)
estat calendar
jwdid_plot
graph export "jwdid_calender_notyet.eps", replace
*Using Never treated as controls
jwdid accidents_per_year, ivar(crossingid) tvar(year) gvar(first_treat) method(poisson) never 
*Figure A2 Panel (b)
estat event, window(-10 15) estore(jw2) 
estimates save ejw, replace
jwdid_plot, style(rbar)
graph export "jwdid_event_never.eps", replace
*Figure A3 Panel (b)
estat calendar
jwdid_plot
graph export "jwdid_calender_never.eps", replace

eststo clear

*--------------------------------------------------------------------------
* END
*--------------------------------------------------------------------------














